In [1]:
# Computations
import numpy as np
import pandas as pd

# preprocessing
from sklearn.impute import SimpleImputer
import re

# Visualisation libraries
## matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline

## plotly
from plotly.offline import init_notebook_mode, iplot 
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%config InlineBackend.figure_format = 'retina' 

## missingno
import missingno as msno

import warnings
warnings.filterwarnings("ignore")
Telco Customer Churn

In this article, we analyze and predict customer churn for Telco Customer Churn data.

Dataset

  • Customers who left within the last month – the column is called Churn
  • Services that each customer has signed up for – phone, multiple lines, internet, online security, online backup, device * protection, tech support, and streaming TV and movies
  • Customer account information – how long they’ve been a customer, contract, payment method, paperless billing, monthly charges, and total charges
  • Demographic info about customers – gender, age range, and if they have partners and dependents
Columns Description
customerID Customer ID
gender Whether the customer is a male or a female
SeniorCitizen Whether the customer is a senior citizen or not (1, 0)
Partner Whether the customer has a partner or not (Yes, No)
Dependents Whether the customer has dependents or not (Yes, No)
tenure Number of months the customer has stayed with the company
PhoneService Whether the customer has a phone service or not (Yes, No)
MultipleLines Whether the customer has multiple lines or not (Yes, No, No phone service)
InternetService Customer’s internet service provider (DSL, Fiber optic, No)
OnlineSecurity Whether the customer has online security or not (Yes, No, No internet service)
OnlineBackup Whether the customer has an online backup or not (Yes, No, No internet service)
DeviceProtection Whether the customer has device protection or not (Yes, No, No internet service)
TechSupport Whether the customer has tech support or not (Yes, No, No internet service)
StreamingTV Whether the customer has streaming TV or not (Yes, No, No internet service)
StreamingMovies Whether the customer has streaming movies or not (Yes, No, No internet service)
Contract The contract term of the customer (Month-to-month, One year, Two years)
PaperlessBilling Whether the customer has paperless billing or not (Yes, No)
PaymentMethod The customer’s payment method (Electronic check, Mailed check, Bank transfer (automatic), Credit card (automatic))
MonthlyCharges The amount charged to the customer monthly
TotalCharges The total amount charged to the customer
Churn Whether the customer churned or not (Yes or No)
In [2]:
Data = pd.read_csv('telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

def Data_info(Inp, Only_NaN = False):
    Out = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
    Out = Out.join(Inp.isnull().sum().to_frame(name = 'Number of NaN Values'), how='outer')
    Out['Percentage'] = np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
    if Only_NaN:
        Out = Out.loc[Out['Number of NaN Values']>0]
    return Out

def dtypes_group(Inp):
    Temp = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
    Out = pd.DataFrame(index =Temp['Data Type'].unique(), columns = ['Columns'])
    for c in Temp['Data Type'].unique():
        Out.loc[Out.index == c, 'Columns'] = [Temp.loc[Temp['Data Type'] == c].index.tolist()]
    return Out

_ = msno.bar(Data, figsize=(16,5), fontsize=14, log=False, color="#34495e")

def text_sep(txt): return re.sub(r"(\w)([A-Z])", r"\1 \2", txt)

def col_details(Col):
    print(Back.BLACK + Fore.CYAN + Style.NORMAL + '%s:' % text_sep(Col))
    print(Style.RESET_ALL)
    print('%s' % ', '.join(Data[Col].unique()))
    
Data.rename(columns = {'gender':'Gender', 'tenure':'Tenure'}, inplace = True)
Data.columns = [text_sep(txt) for txt in Data.columns.tolist()]

Exploratory Data Analysis

Customer Churn by Gender

In [3]:
Feature = 'Gender'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['aquamarine', 'steelblue']
SC = 'Navy'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color= SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.9,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()


fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
                     values= Temp.loc[Temp.Churn == 'Yes','count'].values,
                     name= 'Churn [Yes]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)

fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
                     values=Temp.loc[Temp.Churn == 'No','count'].values,
                     name= 'Churn [No]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)

fig.update_traces(hole=.6, marker_line_color= SC, marker_line_width=1, opacity=1)

fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
                  annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
                               dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
count Percentage
Gender Churn
Female No 2549 36.19
Yes 939 13.33
Male No 2625 37.27
Yes 930 13.20

What stands out from the graph is that nearly 27% of the customers churned, and there is a balance between the two genders among churned customers.

Customer Churn by Senior Citizen

In [4]:
Temp = Data.copy()
Temp['Senior Citizen'] = Temp['Senior Citizen'].map(lambda x: 'Yes' if x ==1 else 'No')
Feature = 'Senior Citizen'
Temp = Temp.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
display(Temp)
C = ['greenyellow', 'seagreen']
SC = 'DarkGreen'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.9,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()


fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
                     values= Temp.loc[Temp.Churn == 'Yes','count'].values,
                     name= 'Churn [Yes]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)

fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
                     values=Temp.loc[Temp.Churn == 'No','count'].values,
                     name= 'Churn [No]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)

fig.update_traces(hole=.6, marker_line_color=SC, marker_line_width=1, opacity=1)

fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
                  annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
                               dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
count Percentage
Senior Citizen Churn
No No 4508 64.01
Yes 1393 19.78
Yes No 666 9.46
Yes 476 6.76

It can be seen that only 25.5% of the senior customers were churned.

Customer Churn by Senior Citizen and Gender

In [5]:
Temp = Data[['Gender','Senior Citizen','Churn']]
Temp['Senior Citizen'] = Temp['Senior Citizen'].map(lambda x: 'Yes' if x ==1 else 'No')
Temp = Temp.groupby(['Gender','Senior Citizen','Churn'])['Churn'].agg({'count'}).rename(columns = {'count':'Count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp = Temp[(Temp.T != 0).any()]
display(Temp)
Temp.reset_index(drop = False, inplace = True)

# Figures
C = ['greenyellow', 'seagreen']
SC = 'DarkGreen'

fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.05, shared_xaxes=True,
                    subplot_titles=('Churned', 'Remaining'))
# Top
fig1 = px.bar(Temp.loc[Temp.Churn == 'Yes'], y= 'Senior Citizen', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = C, height= 200)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= SC, marker_line_width=1, opacity=1, row=1, col=1)

# Bottom
fig2 = px.bar(Temp.loc[Temp.Churn == 'Yes'], y= 'Senior Citizen', x= 'Percentage', orientation='h',
              color = 'Gender', text = 'Percentage', hover_data= Temp.columns,
              color_discrete_sequence = C, height= 200)

fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.update_traces(marker_line_color= SC, marker_line_width=1, opacity=1, showlegend = False, row=2, col=1)

# Update

fig.update_layout(height= 600)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_traces(texttemplate='%{text:.2}%', textposition='outside')
fig.update_xaxes(title_text='Percent', range=[0, 20], row=2, col=1)
fig.update_yaxes(title_text='Senior Citizen', row=1, col=1)
fig.update_yaxes(title_text='Senior Citizen', row=2, col=1)
fig.update_layout(title={'text': 'Customer Churn by Senior Citizen and Gender',
                         'x':0.50, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Count Percentage
Gender Senior Citizen Churn
Female No No 2221 31.53
Yes 699 9.92
Yes No 328 4.66
Yes 240 3.41
Male No No 2287 32.47
Yes 694 9.85
Yes No 338 4.80
Yes 236 3.35

Senior citizens churned much less than non-senior citizens in both genders.

Customer Churn by Partner

In [6]:
Feature = 'Partner'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

display(Temp)
C = ['bisque', 'orange']
SC = 'DarkOrange'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.9,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()


fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
                     values= Temp.loc[Temp.Churn == 'Yes','count'].values,
                     name= 'Churn [Yes]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)

fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
                     values=Temp.loc[Temp.Churn == 'No','count'].values,
                     name= 'Churn [No]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)

fig.update_traces(hole=.6, marker_line_color=SC, marker_line_width=1, opacity=1)

fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
                  annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
                               dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
count Percentage
Partner Churn
No No 2441 34.66
Yes 1200 17.04
Yes No 2733 38.80
Yes 669 9.50

Over 64% of churned customers did not have any partners.

Customer Churn by Dependents

In [7]:
Feature = 'Dependents'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

display(Temp)
C = ['pink', 'hotpink']
SC = 'DarkRed'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 220)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.9,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()


fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
                     values= Temp.loc[Temp.Churn == 'Yes','count'].values,
                     name= 'Churn [Yes]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)

fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
                     values=Temp.loc[Temp.Churn == 'No','count'].values,
                     name= 'Churn [No]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)

fig.update_traces(hole=.6, marker_line_color=SC, marker_line_width=1, opacity=1)

fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
                  annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
                               dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
count Percentage
Dependents Churn
No No 3390 48.13
Yes 1543 21.91
Yes No 1784 25.33
Yes 326 4.63

Over 82 percent of churned customers did not have any dependents.

Customer Churn by Tenure

In [8]:
Feature = 'Tenure'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage',color_continuous_scale= 'ylgn', height= 450)
fig.show()


C = ['violet', 'mediumorchid']
SC = 'Indigo'
fig = px.bar(Temp, x= Feature, y= 'Percentage',
             color = 'Churn', text = 'Percentage', color_discrete_sequence= C, height= 500)

fig.update_traces(marker_line_color=SC, marker_line_width=1.2, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['yaxis'].update(range=[0, 10])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.92,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Feature

Customers with a higher tenure tend to churn less.

Customer Churn by Contract

In [9]:
Feature = 'Contract'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

display(Temp)
C = ['greenyellow', 'limeGreen','DarkGreen']
SC = 'DarkGreen'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 240)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.9,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()


fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
                     values= Temp.loc[Temp.Churn == 'Yes','count'].values,
                     name= 'Churn [Yes]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)

fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
                     values=Temp.loc[Temp.Churn == 'No','count'].values,
                     name= 'Churn [No]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)

fig.update_traces(hole=.6, marker_line_color= SC, marker_line_width=1, opacity=1)

fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
                  annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
                               dict(text='Remaining', x=0.845, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
count Percentage
Contract Churn
Month-to-month No 2220 31.52
Yes 1655 23.50
One year No 1307 18.56
Yes 166 2.36
Two year No 1647 23.38
Yes 48 0.68

The majority of churned customers were on a month-to-month base contract.

Customer Churn by Payment Method

In [10]:
Feature = 'Payment Method'
Temp = Data.groupby([Feature,'Churn'])[Feature].agg({'count'})
Temp['Percentage'] = np.round(100* Temp.values /Temp.sum().values, 2)

display(Temp)
C = ['azure','paleturquoise','steelblue','MidnightBlue']
SC = 'Navy'
Temp.reset_index(drop = False, inplace = True)
fig = px.bar(Temp, y= 'Churn', x= 'Percentage', orientation='h',
             color = Feature, text = 'Percentage', color_discrete_sequence= C, height= 260)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide', plot_bgcolor= 'white')
fig['layout']['xaxis'].update(range=[0, 100])
fig.update_layout(title={'text': 'Customer Churn by %s' % Feature,
                         'x':0.5, 'y':0.9,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.show()


fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels= Temp.loc[Temp.Churn == 'Yes',Feature].values,
                     values= Temp.loc[Temp.Churn == 'Yes','count'].values,
                     name= 'Churn [Yes]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 1)

fig.add_trace(go.Pie(labels=Temp.loc[Temp.Churn == 'No', Feature].values,
                     values=Temp.loc[Temp.Churn == 'No','count'].values,
                     name= 'Churn [No]', textfont=dict(size=16),
                     marker=dict(colors = C, line=dict(color='black', width=1))), 1, 2)

fig.update_traces(hole=.6, marker_line_color= SC, marker_line_width=1, opacity=1)

fig.update_layout(legend_title=Feature, font=dict(size=14), legend=dict(orientation="v"),
                  annotations=[dict(text='Churned', x=0.18, y=0.5, font_size=16, showarrow=False),
                               dict(text='Remaining', x=0.85, y=0.5, font_size=16, showarrow=False)], height = 400)
fig.show()
del Feature
count Percentage
Payment Method Churn
Bank transfer (automatic) No 1286 18.26
Yes 258 3.66
Credit card (automatic) No 1290 18.32
Yes 232 3.29
Electronic check No 1294 18.37
Yes 1071 15.21
Mailed check No 1304 18.51
Yes 308 4.37

Customers with an automatic payment method churned less.

Churn by Monthly Charges and Tenure

In [11]:
Temp = Data[['Monthly Charges', 'Tenure', 'Churn']]
Temp['Monthly Charges'] = Temp['Monthly Charges']
Temp['Tenure'] = pd.to_numeric(Temp['Tenure'], errors='coerce')
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
Temp['Tenure'] = imp.fit_transform(Temp['Tenure'].values.reshape(-1,1))

bins = pd.IntervalIndex.from_tuples([(18, 40), (40, 60), (60, 80),(80, 100), (100, 120)])
Temp['Monthly Charges'] = pd.cut(Temp['Monthly Charges'], bins)

bins = pd.IntervalIndex.from_tuples([(0,6), (6, 12), (12, 18), (18, 24), (24, 36), (36, 60), (60, 80)])
Temp['Tenure'] = pd.cut(Temp['Tenure'], bins)

del bins

fig = make_subplots(rows=1, cols=2, shared_yaxes=True,
                    subplot_titles=('Customer Churn by Monthly Charges',
                                    'Customer Churn by Tenure'))

C = ['LightCoral', 'LimeGreen']
SC = 'Black'
# Left
Temp0 = Temp.groupby(['Monthly Charges', 'Churn'])['Churn'].agg({'count'})
Temp0['Percentage'] = np.round(100* Temp0.values /Temp0.sum().values, 2)
Temp0 = Temp0.reset_index()
Temp0['Monthly Charges'] = Temp0['Monthly Charges'].astype(str)

fig1 = px.bar(Temp0, x= 'Monthly Charges', y= 'Percentage', color = 'Churn',
              text = 'Percentage', color_discrete_sequence= C)
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= SC, marker_line_width=.8, opacity=1, showlegend = False, row=1, col=1)

# Right
Temp0 = Temp.groupby(['Tenure', 'Churn'])['Churn'].agg({'count'})
Temp0['Percentage'] = np.round(100* Temp0.values /Temp0.sum().values, 2)
Temp0 = Temp0.reset_index()
Temp0['Tenure'] = Temp0['Tenure'].astype(str)

fig2 = px.bar(Temp0, x= 'Tenure', y= 'Percentage', color = 'Churn',
              text = 'Percentage', color_discrete_sequence= C)
fig2.update_traces(marker_line_color= SC, marker_line_width=1, opacity=1)
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig2['data'][1], row=1, col=2)

# Updates
fig.update_yaxes(title_text='Percentage', range=[0, 30], row=1, col=1)
fig.update_layout(barmode='stack', plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.show()

What can be seen from this left plot is that as the monthly charges increase, the churn rate increases as well. However, it seems that customers with monthly charges around 100$ or above churn less than customers with a monthly charge of 80 to 100.

Moreover, we can see from the right plot that the churn rate decreases as tenure increases.

Alternatively, we can also demonstrate the above figure as follows

In [12]:
Temp = Data[['Monthly Charges', 'Tenure', 'Churn']]
Temp['Monthly Charges'] = Temp['Monthly Charges']
Temp['Tenure'] = pd.to_numeric(Temp['Tenure'], errors='coerce')
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
Temp['Tenure'] = imp.fit_transform(Temp['Tenure'].values.reshape(-1,1))

C = ['hotpink', 'steelblue']
SC = 'indigo'

fig = px.scatter(Temp, x= 'Monthly Charges', y= 'Tenure', color = 'Churn',
                  color_discrete_sequence= C, hover_data= Temp.columns, height= 600)
fig.update_traces(marker_line_color=SC, marker_line_width=0.5, opacity=1)
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[17, 120])
fig['layout']['yaxis'].update(range=[-1, 80])
fig.update_layout(title = 'Customer Churn by Gender', plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray',
                 zeroline=True, zerolinewidth=1, zerolinecolor='Lightgray')
fig.update_layout(title={'text': 'Customer Churn by Monthly Charges and Tenure',
                         'x':0.5, 'y':0.95,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()
In [13]:
fig = make_subplots(rows=1, cols=2, subplot_titles=('by Monthly Charges and Gender', 'by Tenure and Gender'))

Colors = ['LightCoral', 'LimeGreen']
LC = 'Black'
# Left
fig1 = px.box(Data, x='Gender', y='Monthly Charges', color='Churn',
          hover_data=['Gender','Monthly Charges','Churn'], color_discrete_sequence= Colors[::-1])
fig1.update_traces(quartilemethod='linear')
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.update_traces(marker_line_color= LC, marker_line_width=.8, opacity=1, showlegend = False, row=1, col=1)

# # Right
fig2 = px.box(Data, x='Gender', y='Tenure', color='Churn',
          hover_data=['Gender','Tenure','Churn'], color_discrete_sequence= Colors[::-1])
fig2.update_traces(quartilemethod='linear')
fig.add_trace(fig2['data'][0], row=1, col=2)
fig.add_trace(fig2['data'][1], row=1, col=2)

# Updates
fig.update_layout(boxmode='group')
fig.update_yaxes(title_text='Percentage', range=[-2, 140], row=1, col=1)
fig.update_yaxes(title_text='Percentage', range=[-2, 80], row=1, col=2)
fig.update_layout(barmode='stack', plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
                 showgrid=True, gridwidth=1, gridcolor='Lightgray',
                 zeroline=True, zerolinewidth=1, zerolinecolor='Lightgray')

fig.update_layout(title={'text': 'Customer Churn',
                         'x':0.5, 'y':0.88,
                         'xanchor': 'center', 'yanchor': 'top'})
fig.show()

For classification and modeling, please see the next files in the directory.